/* Copyright (c) 2014, 2022, Oracle and/or its affiliates.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License, version 2.0,
   as published by the Free Software Foundation.

   This program is also distributed with certain software (including
   but not limited to OpenSSL) that is licensed under separate terms,
   as designated in a particular file or component or in included license
   documentation.  The authors of MySQL hereby grant you an additional
   permission to link the program and your derivative works with the
   separately licensed software that they have included with MySQL.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License, version 2.0, for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */

/*
  In order to do benchmarking, configure in optimized mode, and build the
  target
    strings_strnxfrm-t
  it is defined, but not built by default. Then run with:
    ./bin/strings_strnxfrm-t --gtest_filter='Microbenchmarks*'
 */

#include <gtest/gtest.h>
#include <inttypes.h>
#include <sys/types.h>
#include <algorithm>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "my_inttypes.h"
#include "my_sys.h"
#include "template_utils.h"
#include "unittest/gunit/benchmark.h"
#include "unittest/gunit/strnxfrm.h"

using std::make_pair;
using std::max;
using std::pair;
using std::string;
using std::to_string;
using std::unordered_map;

namespace strnxfrm_unittest {

namespace {

// Simply print out an array.
void print_array(const uchar *arr, size_t len) {
  for (size_t i = 0; i < len; ++i) {
    fprintf(stderr, " %02x", arr[i]);
    if ((i % 8) == 7 || i == len - 1) fprintf(stderr, "\n");
  }
  fprintf(stderr, "\n");
}

// A function to compare two arrays and print them out in its entirety
// (for easier context) if they are not equal.
void expect_arrays_equal(const uchar *expected, const uchar *got, size_t len) {
  int num_err = 0;
  for (size_t i = 0; i < len && num_err < 5; ++i) {
    EXPECT_EQ(expected[i], got[i]);
    if (expected[i] != got[i]) ++num_err;
  }
  if (num_err) {
    fprintf(stderr, "Expected:\n");
    for (size_t i = 0; i < len; ++i) {
      fprintf(stderr, " %c%02x", expected[i] != got[i] ? '*' : ' ',
              expected[i]);
      if ((i % 8) == 7 || i == len - 1) fprintf(stderr, "\n");
    }
    fprintf(stderr, "\nGot:\n");
    for (size_t i = 0; i < len; ++i) {
      fprintf(stderr, " %c%02x", expected[i] != got[i] ? '*' : ' ', got[i]);
      if ((i % 8) == 7 || i == len - 1) fprintf(stderr, "\n");
    }
    fprintf(stderr, "\n");
  }
}

CHARSET_INFO *init_collation(const char *name) {
  MY_CHARSET_LOADER loader;
  return my_collation_get_by_name(&loader, name, MYF(0));
}

int compare_through_strxfrm(CHARSET_INFO *cs, const char *a, const char *b) {
  uchar abuf[256], bbuf[256];
  int alen = my_strnxfrm(cs, abuf, sizeof(abuf), pointer_cast<const uchar *>(a),
                         strlen(a));
  int blen = my_strnxfrm(cs, bbuf, sizeof(bbuf), pointer_cast<const uchar *>(b),
                         strlen(b));

  if (false)  // Enable this for debugging.
  {
    fprintf(stderr, "\n\nstrxfrm for '%s':\n", a);
    print_array(abuf, alen);
    fprintf(stderr, "strxfrm for '%s':\n", b);
    print_array(bbuf, blen);
  }

  int cmp = memcmp(abuf, bbuf, std::min(alen, blen));
  if (cmp != 0) return cmp;

  if (alen == blen) {
    return 0;
  } else {
    return (alen < blen) ? -1 : 1;
  }
}

}  // namespace

#if !defined(NDEBUG)
// There is no point in benchmarking anything in debug mode.
const size_t num_iterations = 1ULL;
#else
// Set this so that each test case takes a few seconds.
// And set it back to a small value before pushing!!
// const size_t num_iterations= 20000000ULL;
const size_t num_iterations = 2ULL;
#endif

class StrnxfrmTest : public ::testing::TestWithParam<size_t> {
 protected:
  void SetUp() override {
    m_length = GetParam();
    m_src.assign(m_length, 0x20);
    m_dst.assign(m_length, 0x20);
  }
  std::vector<uchar> m_src;
  std::vector<uchar> m_dst;
  size_t m_length;
};

size_t test_values[] = {1, 10, 100, 1000};

INSTANTIATE_TEST_SUITE_P(Strnxfrm, StrnxfrmTest,
                         ::testing::ValuesIn(test_values));

TEST_P(StrnxfrmTest, OriginalSrcDst) {
  CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
  for (size_t ix = 0; ix < num_iterations; ++ix)
    strnxfrm_orig(cs, &m_dst[0], m_length, m_length, &m_src[0], m_length, 192);
}

TEST_P(StrnxfrmTest, OriginalUnrolledSrcDst) {
  CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
  for (size_t ix = 0; ix < num_iterations; ++ix)
    strnxfrm_orig_unrolled(cs, &m_dst[0], m_length, m_length, &m_src[0],
                           m_length, 192);
}

TEST_P(StrnxfrmTest, ModifiedSrcDst) {
  CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
  for (size_t ix = 0; ix < num_iterations; ++ix)
    strnxfrm_new(cs, &m_dst[0], m_length, m_length, &m_src[0], m_length, 192);
}

TEST_P(StrnxfrmTest, ModifiedUnrolledSrcDst) {
  CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
  for (size_t ix = 0; ix < num_iterations; ++ix)
    strnxfrm_new_unrolled(cs, &m_dst[0], m_length, m_length, &m_src[0],
                          m_length, 192);
}

TEST_P(StrnxfrmTest, OriginalSrcSrc) {
  CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
  for (size_t ix = 0; ix < num_iterations; ++ix)
    strnxfrm_orig(cs, &m_src[0], m_length, m_length, &m_src[0], m_length, 192);
}

TEST_P(StrnxfrmTest, OriginalUnrolledSrcSrc) {
  CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
  for (size_t ix = 0; ix < num_iterations; ++ix)
    strnxfrm_orig_unrolled(cs, &m_src[0], m_length, m_length, &m_src[0],
                           m_length, 192);
}

TEST_P(StrnxfrmTest, ModifiedSrcSrc) {
  CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
  for (size_t ix = 0; ix < num_iterations; ++ix)
    strnxfrm_new(cs, &m_src[0], m_length, m_length, &m_src[0], m_length, 192);
}

TEST_P(StrnxfrmTest, ModifiedUnrolledSrcSrc) {
  CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
  for (size_t ix = 0; ix < num_iterations; ++ix)
    strnxfrm_new_unrolled(cs, &m_src[0], m_length, m_length, &m_src[0],
                          m_length, 192);
}

TEST(StrXfrmTest, SimpleUTF8Correctness) {
  CHARSET_INFO *cs = init_collation("utf8mb3_bin");

  const char *src = "abc æøå 日本語";
  unsigned char buf[32];

  static const unsigned char full_answer_with_pad[32] = {
      0x00, 0x61, 0x00, 0x62, 0x00, 0x63,  // abc
      0x00, 0x20,                          // space
      0x00, 0xe6, 0x00, 0xf8, 0x00, 0xe5,  // æøå
      0x00, 0x20,                          // space
      0x65, 0xe5, 0x67, 0x2c, 0x8a, 0x9e,  // 日本語
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20  // space for padding
  };

  for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
    memset(buf, 0xff, sizeof(buf));
    my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
    expect_arrays_equal(full_answer_with_pad, buf, maxlen);
  }
}

TEST(StrXfrmTest, SimpleUTF8MB4Correctness) {
  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");

  const char *src = "abc æøå 日本語";
  unsigned char buf[30];

  static const unsigned char full_answer_with_pad[30] = {
      0x1c, 0x47, 0x1c, 0x60, 0x1c, 0x7a,              // abc
      0x02, 0x09,                                      // space
      0x1c, 0x47, 0x1c, 0xaa, 0x1d, 0xdd, 0x1c, 0x47,  // æøå
      0x02, 0x09,                                      // space
      0xfb, 0x40, 0xe5, 0xe5, 0xfb, 0x40, 0xe7, 0x2c,
      0xfb, 0x41, 0x8a, 0x9e,  // 日本語
  };

  for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
    memset(buf, 0xff, sizeof(buf));
    my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
    expect_arrays_equal(full_answer_with_pad, buf, maxlen);
  }
}

TEST(StrXfrmTest, UTF8MB4Correctness_as_ci) {
  CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_ci");

  const char *src = "abc æøå 日本語";
  unsigned char buf[62];

  static const unsigned char full_answer_with_pad[62] = {
      0x1c, 0x47, 0x1c, 0x60, 0x1c, 0x7a,              // abc
      0x02, 0x09,                                      // space
      0x1c, 0x47, 0x1c, 0xaa, 0x1d, 0xdd, 0x1c, 0x47,  // æøå
      0x02, 0x09,                                      // space
      0xfb, 0x40, 0xe5, 0xe5, 0xfb, 0x40, 0xe7, 0x2c,  // 日本語
      0xfb, 0x41, 0x8a, 0x9e, 0x00, 0x00,              // level separator
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20,              // abc
      0x00, 0x20,                                      // space
      0x00, 0x20, 0x01, 0x10, 0x00, 0x20, 0x00, 0x20,  // æøå
      0x00, 0x2F, 0x00, 0x20, 0x00, 0x29, 0x00, 0x20,  // space
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20               // 日本語
  };

  for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
    memset(buf, 0xff, sizeof(buf));
    my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
    expect_arrays_equal(full_answer_with_pad, buf, maxlen);
  }
}

TEST(StrXfrmTest, UTF8MB4Correctness_as_ci_1) {
  CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_ci");
  // case insensitive
  EXPECT_EQ(compare_through_strxfrm(cs, "Abc", "aBC"), 0);
  // accent sensitive
  EXPECT_NE(compare_through_strxfrm(cs, "ǍḄÇ", "ÁḆĈ"), 0);
  EXPECT_NE(compare_through_strxfrm(cs, u8"\uA73A", u8"\uA738"), 0);
  // Hangul decomposition
  EXPECT_EQ(compare_through_strxfrm(cs, u8"\uAC00", u8"\u326E"), 0);
}

TEST(StrXfrmTest, JapaneseUTF8MB4) {
  CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs");

  const char *src =
      "\x61\x41\xCA\xAC\xCA\xAD"  // latin 'aAʬʭ'
                                  // Hiragana and Katakana 'ぁンはばぱ'
      "\xE3\x81\x81\xE3\x83\xB3\xE3\x81\xAF\xE3\x81\xB0\xE3\x81\xB1"
      // Japanese Han '亜熙憐'
      "\xE4\xBA\x9C\xE7\x86\x99\xE6\x86\x90"
      // Other Han '﨎㐀'
      "\xEF\xA8\x8E\xE3\x90\x80"
      // Greek, Coptic etc. 'αⲁаⳤퟻ'
      "\xCE\xB1\xE2\xB2\x81\xD0\xB0\xE2\xB3\xA4\xED\x9F\xBB";

  static const unsigned char full_answer_with_pad[156] = {
      // Level 1
      0x1C, 0x47, 0x1C, 0x47, 0x1F, 0xB1, 0x1F, 0xB5,  // latin
      0x1F, 0xB6, 0x1F, 0xE7, 0x1F, 0xD0, 0x1F, 0xD0,  // Hiragana and Katakana
      0x1F, 0xD0, 0x54, 0xA4, 0x6D, 0x76, 0x60, 0x00,  // Japanese Han
      0xFB, 0x41, 0xFA, 0x0E, 0xFB, 0x80, 0xB4, 0x00,  // Other Han
      0xFB, 0x86, 0x1F, 0xB9, 0xFB, 0x86, 0x1F, 0xE6,  // Greek, Coptic etc.
      0xFB, 0x86, 0x20, 0x22, 0xFB, 0x86, 0x1F, 0xF1, 0xFB, 0x86, 0x1F, 0xE6,
      0xFB, 0x86, 0x1F, 0xF0, 0xFB, 0x86, 0x3D, 0x59, 0x00,
      0x00,                                            // Level separator
                                                       // Level 2
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,  // latin
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,  // Hiragana and Katakana
      0x00, 0x37, 0x00, 0x20, 0x00, 0x38, 0x00, 0x20, 0x00, 0x20, 0x00,
      0x20,                                            // Japanese Han
      0x00, 0x20, 0x00, 0x20,                          // Other Han
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,  // Greek, Coptic etc.
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x00,  // Level separator
                                                       // Level 3
      0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02,  // latin
      0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,  // Hiragana and Katakana
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00,
      0x02,                                            // Japanese Han
      0x00, 0x02, 0x00, 0x02,                          // Other Han
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x04,  // Greek, Coptic etc.
      0x00, 0x04, 0x00, 0x04, 0x00, 0x02};

  unsigned char buf[sizeof(full_answer_with_pad)];
  for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
    memset(buf, 0xff, sizeof(buf));
    my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
    expect_arrays_equal(full_answer_with_pad, buf, maxlen);
  }
}

TEST(StrXfrmTest, Japanese_ks_UTF8MB4) {
  CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs_ks");

  /*
    Weights of Japanese Han, Other Han, Greek, Coptic are not changed comparing
    to the test result of collation utf8mb4_ja_0900_as_cs (in test
    JapaneseUtf8mb4 above). But additional quaternary weight is added for
    Hiragana and Katakana characters.
  */
  const char *src =
      "\x61\x41\xCA\xAC\xCA\xAD"  // latin 'aAʬʭ'
                                  // Hiragana and Katakana 'ぁンはばぱ'
      "\xE3\x81\x81\xE3\x83\xB3\xE3\x81\xAF\xE3\x81\xB0\xE3\x81\xB1"
      // Japanese Han '亜熙憐'
      "\xE4\xBA\x9C\xE7\x86\x99\xE6\x86\x90"
      // Other Han '﨎㐀'
      "\xEF\xA8\x8E\xE3\x90\x80"
      // Greek, Coptic etc. 'αⲁаⳤퟻ'
      "\xCE\xB1\xE2\xB2\x81\xD0\xB0\xE2\xB3\xA4\xED\x9F\xBB"
      // Prefix context 'さー' and 'サー'
      "\xE3\x81\x95\xE3\x83\xBC\xE3\x82\xB5\xE3\x83\xBC";

  static const unsigned char full_answer_with_pad[] = {
      // Level 1
      0x1C, 0x47, 0x1C, 0x47, 0x1F, 0xB1, 0x1F, 0xB5,  // latin
      0x1F, 0xB6, 0x1F, 0xE7, 0x1F, 0xD0, 0x1F, 0xD0,  // Hiragana and Katakana
      0x1F, 0xD0, 0x54, 0xA4, 0x6D, 0x76, 0x60, 0x00,  // Japanese Han
      0xFB, 0x41, 0xFA, 0x0E, 0xFB, 0x80, 0xB4, 0x00,  // Other Han
      0xFB, 0x86, 0x1F, 0xB9, 0xFB, 0x86, 0x1F, 0xE6,  // Greek, Coptic etc.
      0xFB, 0x86, 0x20, 0x22, 0xFB, 0x86, 0x1F, 0xF1, 0xFB, 0x86, 0x1F, 0xE6,
      0xFB, 0x86, 0x1F, 0xF0, 0xFB, 0x86, 0x3D, 0x59, 0x1F, 0xC1, 0x1F, 0xB6,
      0x1F, 0xC1, 0x1F, 0xB6,                          // Prefix context
      0x00, 0x00,                                      // Level separator
                                                       // Level 2
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,  // latin
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,  // Hiragana and Katakana
      0x00, 0x37, 0x00, 0x20, 0x00, 0x38, 0x00, 0x20, 0x00, 0x20, 0x00,
      0x20,                                            // Japanese Han
      0x00, 0x20, 0x00, 0x20,                          // Other Han
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,  // Greek, Coptic etc.
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20,                                      // Prefix context
      0x00, 0x00,                                      // Level separator
                                                       // Level 3
      0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02,  // latin
      0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,  // Hiragana and Katakana
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00,
      0x02,                                            // Japanese Han
      0x00, 0x02, 0x00, 0x02,                          // Other Han
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x04,  // Greek, Coptic etc.
      0x00, 0x04, 0x00, 0x04, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
      0x00, 0x0E,                                      // Prefix context
      0x00, 0x0C, 0x00, 0x21, 0x00, 0x00,              // Level separator
                                                       // Level 4
      0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02,  // Hiragana and Katakana
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00,
      0x08  // Prefix context
  };

  unsigned char buf[sizeof(full_answer_with_pad)];
  for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
    memset(buf, 0xff, sizeof(buf));
    my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
    expect_arrays_equal(full_answer_with_pad, buf, maxlen);
  }

  CHARSET_INFO *as_cs = init_collation("utf8mb4_ja_0900_as_cs");
  CHARSET_INFO *as_cs_ks = init_collation("utf8mb4_ja_0900_as_cs_ks");
  // utf8 "にほんご"
  const char *str1 = "\xE3\x81\xAB\xE3\x81\xBB\xE3\x82\x93\xE3\x81\x94";
  // utf8 "ニホンゴ"
  const char *str2 = "\xE3\x83\x8B\xE3\x83\x9B\xE3\x83\xB3\xE3\x82\xB4";
  EXPECT_EQ(compare_through_strxfrm(as_cs, str1, str2), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs_ks, str1, str2), 0);

  str1 = "\xE3\x81\xAF\xE3\x81\xAF";              // utf8 "はは"
  str2 = "\xE3\x81\xAF\xE3\x83\x8F";              // utf8 "はハ"
  const char *str3 = "\xE3\x83\x8F\xE3\x81\xAF";  // utf8 "ハは"
  const char *str4 = "\xE3\x83\x8F\xE3\x83\x8F";  // utf8 "ハハ"
  EXPECT_EQ(compare_through_strxfrm(as_cs, str1, str2), 0);
  EXPECT_EQ(compare_through_strxfrm(as_cs, str2, str3), 0);
  EXPECT_EQ(compare_through_strxfrm(as_cs, str3, str4), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs_ks, str1, str2), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs_ks, str2, str3), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs_ks, str3, str4), 0);
}

TEST(StrXfrmTest, JapaneseUTF8MB4_1) {
  CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs");
  // Japanese HE followed with Handakuten mark
  const char *src1 = "\xE3\x81\xB8\xE3\x82\x99";
  // Japanese HE followed with voiced length mark
  const char *src2 = "\xE3\x81\xB8\xE3\x82\x9E";

  /*
    When the voiced length mark is after 'HE', it should sort before
    'HE followed with Handakuten mark'on tertiary level.
  */
  static const unsigned char answer1[] = {0x1F, 0xD3, 0x00, 0x00, 0x00,
                                          0x20, 0x00, 0x37, 0x00, 0x00,
                                          0x00, 0x0E, 0x00, 0x02};
  static const unsigned char answer2[] = {
      0x1F, 0xD3, 0x1F, 0xD3, 0x00, 0x00, 0x00, 0x20, 0x00, 0x20, 0x00,
      0x37, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x01, 0x00, 0x21};

  unsigned char buf[32];

  size_t buf_len = sizeof(answer1);
  memset(buf, 0xff, sizeof(buf));
  my_strnxfrm(cs, buf, buf_len, pointer_cast<const uchar *>(src1),
              strlen(src1));
  expect_arrays_equal(answer1, buf, buf_len);

  buf_len = sizeof(answer2);
  memset(buf, 0xff, sizeof(buf));
  my_strnxfrm(cs, buf, buf_len, pointer_cast<const uchar *>(src2),
              strlen(src2));
  expect_arrays_equal(answer2, buf, buf_len);
}

TEST(StrXfrmTest, UTF8MB4PadCorrectness) {
  CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_cs");

  const char *src = "abc    ";
  unsigned char buf[46];

  static const unsigned char full_answer[52] = {
      0x1c, 0x47, 0x1c, 0x60, 0x1c, 0x7a,  // abc
      0x02, 0x09, 0x02, 0x09, 0x02, 0x09,
      0x02, 0x09,                          // Four spaces.
      0x00, 0x00,                          // Level separator.
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20,  // Accents for abc.
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20,                          // Accents for four spaces.
      0x00, 0x00,                          // Level separator.
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02,  // Case for abc.
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02,  // Case for four spaces.
  };

  for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
    SCOPED_TRACE("maxlen=" + to_string(maxlen) + "/" + to_string(sizeof(buf)));
    memset(buf, 0xff, sizeof(buf));
    my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
    expect_arrays_equal(full_answer, buf, maxlen);
  }
}

TEST(StrXfrmTest, NullPointer) {
  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
  unsigned char buf[256];

  memset(buf, 0x33, sizeof(buf));
  cs->coll->strnxfrm(cs, buf, sizeof(buf), sizeof(buf), nullptr, 0,
                     MY_STRXFRM_PAD_TO_MAXLEN);

  for (size_t i = 0; i < sizeof(buf); ++i) {
    EXPECT_EQ(0, buf[i]);
  }
}

// Benchmark based on reduced test case in Bug #83247 / #24788778.
//
// Note: This benchmark does not exercise any real multibyte characters;
// it is mostly exercising padding. If we change the test string to contain
// e.g. Japanese characters, performance goes down by ~20%.
static void BM_SimpleUTF8(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb3_bin");

  static constexpr int key_cols = 12;
  static constexpr int set_key_cols = 6;  // Only the first half is set.
  static constexpr int key_col_chars = 80;
  static constexpr int bytes_per_char = 3;
  static constexpr int key_bytes = key_col_chars * bytes_per_char;
  static constexpr int buffer_bytes = key_cols * key_bytes;

  unsigned char source[buffer_bytes];
  unsigned char dest[buffer_bytes];

  const char *content = "PolyFilla27773";
  const int len = strlen(content);
  memset(source, 0, sizeof(source));

  for (int k = 0, offset = 0; k < set_key_cols; ++k, offset += key_bytes) {
    memcpy(source + offset, content, len);
  }

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    for (int k = 0, offset = 0; k < key_cols; ++k, offset += key_bytes) {
      if (k < set_key_cols) {
        my_strnxfrm(cs, dest + offset, key_bytes, source + offset, len);
      } else {
        my_strnxfrm(cs, dest + offset, key_bytes, source + offset, 0);
      }
    }
  }
  StopBenchmarkTiming();
}
BENCHMARK(BM_SimpleUTF8)

// Verifies using my_charpos to find the length of a string.
// hp_hash.c does this extensively. Not really a strnxfrm benchmark,
// but belongs to the same optimization effort.
static void BM_UTF8MB4StringLength(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");

  // Some English text, then some Norwegian text, then some Japanese,
  // and then a few emoji (the last with skin tone modifiers).
  const char *content =
      "Premature optimization is the root of all evil. "
      "Våre norske tegn bør æres. 日本語が少しわかります。 "
      "✌️🐶👩🏽";
  const int len = strlen(content);
  int tot_len = 0;

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    tot_len += my_charpos(cs, content, content + len, len / cs->mbmaxlen);
  }
  StopBenchmarkTiming();

  EXPECT_NE(0, tot_len);
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_UTF8MB4StringLength)

// Benchmark testing the default recommended collation for 8.0, without
// stressing padding as much, but still testing only Latin letters.
static void BM_SimpleUTF8MB4(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");

  const char *content =
      "This is a rather long string that contains only "
      "simple letters that are available in ASCII. This is a common special "
      "case that warrants a benchmark on its own, even if the character set "
      "and collation supports much more complicated scenarios.";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x1e, 0x95, 0x1d, 0x18, 0x1d, 0x32, 0x1e, 0x71, 0x02, 0x09, 0x1d, 0x32,
      0x1e, 0x71, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09, 0x1e, 0x33, 0x1c, 0x47,
      0x1e, 0x95, 0x1d, 0x18, 0x1c, 0xaa, 0x1e, 0x33, 0x02, 0x09, 0x1d, 0x77,
      0x1d, 0xdd, 0x1d, 0xb9, 0x1c, 0xf4, 0x02, 0x09, 0x1e, 0x71, 0x1e, 0x95,
      0x1e, 0x33, 0x1d, 0x32, 0x1d, 0xb9, 0x1c, 0xf4, 0x02, 0x09, 0x1e, 0x95,
      0x1d, 0x18, 0x1c, 0x47, 0x1e, 0x95, 0x02, 0x09, 0x1c, 0x7a, 0x1d, 0xdd,
      0x1d, 0xb9, 0x1e, 0x95, 0x1c, 0x47, 0x1d, 0x32, 0x1d, 0xb9, 0x1e, 0x71,
      0x02, 0x09, 0x1d, 0xdd, 0x1d, 0xb9, 0x1d, 0x77, 0x1f, 0x0b, 0x02, 0x09,
      0x1e, 0x71, 0x1d, 0x32, 0x1d, 0xaa, 0x1e, 0x0c, 0x1d, 0x77, 0x1c, 0xaa,
      0x02, 0x09, 0x1d, 0x77, 0x1c, 0xaa, 0x1e, 0x95, 0x1e, 0x95, 0x1c, 0xaa,
      0x1e, 0x33, 0x1e, 0x71, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1c, 0x47,
      0x1e, 0x95, 0x02, 0x09, 0x1c, 0x47, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09,
      0x1c, 0x47, 0x1e, 0xe3, 0x1c, 0x47, 0x1d, 0x32, 0x1d, 0x77, 0x1c, 0x47,
      0x1c, 0x60, 0x1d, 0x77, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0x32, 0x1d, 0xb9,
      0x02, 0x09, 0x1c, 0x47, 0x1e, 0x71, 0x1c, 0x7a, 0x1d, 0x32, 0x1d, 0x32,
      0x02, 0x77, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1d, 0x32, 0x1e, 0x71,
      0x02, 0x09, 0x1d, 0x32, 0x1e, 0x71, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09,
      0x1c, 0x7a, 0x1d, 0xdd, 0x1d, 0xaa, 0x1d, 0xaa, 0x1d, 0xdd, 0x1d, 0xb9,
      0x02, 0x09, 0x1e, 0x71, 0x1e, 0x0c, 0x1c, 0xaa, 0x1c, 0x7a, 0x1d, 0x32,
      0x1c, 0x47, 0x1d, 0x77, 0x02, 0x09, 0x1c, 0x7a, 0x1c, 0x47, 0x1e, 0x71,
      0x1c, 0xaa, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1c, 0x47, 0x1e, 0x95,
      0x02, 0x09, 0x1e, 0xf5, 0x1c, 0x47, 0x1e, 0x33, 0x1e, 0x33, 0x1c, 0x47,
      0x1d, 0xb9, 0x1e, 0x95, 0x1e, 0x71, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09,
      0x1c, 0x60, 0x1c, 0xaa, 0x1d, 0xb9, 0x1c, 0x7a, 0x1d, 0x18, 0x1d, 0xaa,
      0x1c, 0x47, 0x1e, 0x33, 0x1d, 0x65, 0x02, 0x09, 0x1d, 0xdd, 0x1d, 0xb9,
      0x02, 0x09, 0x1d, 0x32, 0x1e, 0x95, 0x1e, 0x71, 0x02, 0x09, 0x1d, 0xdd,
      0x1e, 0xf5, 0x1d, 0xb9, 0x02, 0x22, 0x02, 0x09, 0x1c, 0xaa, 0x1e, 0xe3,
      0x1c, 0xaa, 0x1d, 0xb9, 0x02, 0x09, 0x1d, 0x32, 0x1c, 0xe5, 0x02, 0x09,
      0x1e, 0x95, 0x1d, 0x18, 0x1c, 0xaa, 0x02, 0x09, 0x1c, 0x7a, 0x1d, 0x18,
      0x1c, 0x47, 0x1e, 0x33, 0x1c, 0x47, 0x1c, 0x7a, 0x1e, 0x95, 0x1c, 0xaa,
      0x1e, 0x33, 0x02, 0x09, 0x1e, 0x71, 0x1c, 0xaa, 0x1e, 0x95, 0x02, 0x09,
      0x1c, 0x47, 0x1d, 0xb9, 0x1c, 0x8f, 0x02, 0x09, 0x1c, 0x7a, 0x1d, 0xdd,
      0x1d, 0x77, 0x1d, 0x77, 0x1c, 0x47, 0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xdd,
      0x1d, 0xb9, 0x02, 0x09, 0x1e, 0x71, 0x1e, 0xb5, 0x1e, 0x0c, 0x1e, 0x0c,
      0x1d, 0xdd, 0x1e, 0x33, 0x1e, 0x95, 0x1e, 0x71, 0x02, 0x09, 0x1d, 0xaa,
      0x1e, 0xb5, 0x1c, 0x7a, 0x1d, 0x18, 0x02, 0x09, 0x1d, 0xaa, 0x1d, 0xdd,
      0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1c, 0x7a, 0x1d, 0xdd, 0x1d, 0xaa,
      0x1e, 0x0c, 0x1d, 0x77, 0x1d, 0x32, 0x1c, 0x7a, 0x1c, 0x47, 0x1e, 0x95,
      0x1c, 0xaa, 0x1c, 0x8f, 0x02, 0x09, 0x1e, 0x71, 0x1c, 0x7a, 0x1c, 0xaa,
      0x1d, 0xb9, 0x1c, 0x47, 0x1e, 0x33, 0x1d, 0x32, 0x1d, 0xdd, 0x1e, 0x71,
      0x02, 0x77};
  uchar dest[sizeof(expected)];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, sizeof(dest),
                reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  expect_arrays_equal(expected, dest, sizeof(dest));

  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_SimpleUTF8MB4)

// Benchmark testing a wider variety of character sets on a more complicated
// collation (the recommended default collation for 8.0), without stressing
// padding as much.
static void BM_MixedUTF8MB4(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");

  // Some English text, then some Norwegian text, then some Japanese,
  // and then a few emoji (the last with skin tone modifiers).
  const char *content =
      "Premature optimization is the root of all evil. "
      "Våre norske tegn bør æres. 日本語が少しわかります。 "
      "✌️🐶👩🏽";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x1e, 0x0c, 0x1e, 0x33, 0x1c, 0xaa, 0x1d, 0xaa, 0x1c, 0x47, 0x1e, 0x95,
      0x1e, 0xb5, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0xdd, 0x1e, 0x0c,
      0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xaa, 0x1d, 0x32, 0x1f, 0x21, 0x1c, 0x47,
      0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xdd, 0x1d, 0xb9, 0x02, 0x09, 0x1d, 0x32,
      0x1e, 0x71, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1c, 0xaa, 0x02, 0x09,
      0x1e, 0x33, 0x1d, 0xdd, 0x1d, 0xdd, 0x1e, 0x95, 0x02, 0x09, 0x1d, 0xdd,
      0x1c, 0xe5, 0x02, 0x09, 0x1c, 0x47, 0x1d, 0x77, 0x1d, 0x77, 0x02, 0x09,
      0x1c, 0xaa, 0x1e, 0xe3, 0x1d, 0x32, 0x1d, 0x77, 0x02, 0x77, 0x02, 0x09,
      0x1e, 0xe3, 0x1c, 0x47, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0xb9,
      0x1d, 0xdd, 0x1e, 0x33, 0x1e, 0x71, 0x1d, 0x65, 0x1c, 0xaa, 0x02, 0x09,
      0x1e, 0x95, 0x1c, 0xaa, 0x1c, 0xf4, 0x1d, 0xb9, 0x02, 0x09, 0x1c, 0x60,
      0x1d, 0xdd, 0x1e, 0x33, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0xaa, 0x1e, 0x33,
      0x1c, 0xaa, 0x1e, 0x71, 0x02, 0x77, 0x02, 0x09, 0xfb, 0x40, 0xe5, 0xe5,
      0xfb, 0x40, 0xe7, 0x2c, 0xfb, 0x41, 0x8a, 0x9e, 0x3d, 0x60, 0xfb, 0x40,
      0xdc, 0x11, 0x3d, 0x66, 0x3d, 0x87, 0x3d, 0x60, 0x3d, 0x83, 0x3d, 0x79,
      0x3d, 0x67, 0x02, 0x8a, 0x02, 0x09, 0x0a, 0x2d, 0x13, 0xdf, 0x14, 0x12,
      0x13, 0xa6};
  uchar dest[sizeof(expected)];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, sizeof(dest),
                reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  expect_arrays_equal(expected, dest, sizeof(dest));
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_MixedUTF8MB4)

static void BM_MixedUTF8MB4_AS_CI(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_ci");

  // Some English text, then some Norwegian text, then some Japanese,
  // and then a few emoji (the last with skin tone modifiers).
  const char *content =
      "Premature optimization is the root of all evil. "
      "Våre norske tegn bør æres. 日本語が少しわかります。 "
      "✌️🐶👩🏽";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x1e, 0x0c, 0x1e, 0x33, 0x1c, 0xaa, 0x1d, 0xaa, 0x1c, 0x47, 0x1e, 0x95,
      0x1e, 0xb5, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0xdd, 0x1e, 0x0c,
      0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xaa, 0x1d, 0x32, 0x1f, 0x21, 0x1c, 0x47,
      0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xdd, 0x1d, 0xb9, 0x02, 0x09, 0x1d, 0x32,
      0x1e, 0x71, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1c, 0xaa, 0x02, 0x09,
      0x1e, 0x33, 0x1d, 0xdd, 0x1d, 0xdd, 0x1e, 0x95, 0x02, 0x09, 0x1d, 0xdd,
      0x1c, 0xe5, 0x02, 0x09, 0x1c, 0x47, 0x1d, 0x77, 0x1d, 0x77, 0x02, 0x09,
      0x1c, 0xaa, 0x1e, 0xe3, 0x1d, 0x32, 0x1d, 0x77, 0x02, 0x77, 0x02, 0x09,
      0x1e, 0xe3, 0x1c, 0x47, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0xb9,
      0x1d, 0xdd, 0x1e, 0x33, 0x1e, 0x71, 0x1d, 0x65, 0x1c, 0xaa, 0x02, 0x09,
      0x1e, 0x95, 0x1c, 0xaa, 0x1c, 0xf4, 0x1d, 0xb9, 0x02, 0x09, 0x1c, 0x60,
      0x1d, 0xdd, 0x1e, 0x33, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0xaa, 0x1e, 0x33,
      0x1c, 0xaa, 0x1e, 0x71, 0x02, 0x77, 0x02, 0x09, 0xfb, 0x40, 0xe5, 0xe5,
      0xfb, 0x40, 0xe7, 0x2c, 0xfb, 0x41, 0x8a, 0x9e, 0x3d, 0x60, 0xfb, 0x40,
      0xdc, 0x11, 0x3d, 0x66, 0x3d, 0x87, 0x3d, 0x60, 0x3d, 0x83, 0x3d, 0x79,
      0x3d, 0x67, 0x02, 0x8a, 0x02, 0x09, 0x0a, 0x2d, 0x13, 0xdf, 0x14, 0x12,
      0x13, 0xa6, 0x00, 0x00, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x29, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x2F, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x01, 0x10, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20};
  uchar dest[sizeof(expected)];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, sizeof(dest),
                reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  expect_arrays_equal(expected, dest, sizeof(dest));
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_MixedUTF8MB4_AS_CI)

// Case-sensitive, accent-sensitive benchmark, using the same string as
// BM_SimpleUTF8MB4. This will naturally be slower, since many more weights
// need to be generated.
static void BM_MixedUTF8MB4_AS_CS(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_cs");

  // Some English text, then some Norwegian text, then some Japanese,
  // and then a few emoji (the last with skin tone modifiers).
  const char *content =
      "Premature optimization is the root of all evil. "
      "Våre norske tegn bør æres. 日本語が少しわかります。 "
      "✌️🐶👩🏽";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      // Primary weights.
      0x1e,
      0x0c,
      0x1e,
      0x33,
      0x1c,
      0xaa,
      0x1d,
      0xaa,
      0x1c,
      0x47,
      0x1e,
      0x95,
      0x1e,
      0xb5,
      0x1e,
      0x33,
      0x1c,
      0xaa,
      0x02,
      0x09,
      0x1d,
      0xdd,
      0x1e,
      0x0c,
      0x1e,
      0x95,
      0x1d,
      0x32,
      0x1d,
      0xaa,
      0x1d,
      0x32,
      0x1f,
      0x21,
      0x1c,
      0x47,
      0x1e,
      0x95,
      0x1d,
      0x32,
      0x1d,
      0xdd,
      0x1d,
      0xb9,
      0x02,
      0x09,
      0x1d,
      0x32,
      0x1e,
      0x71,
      0x02,
      0x09,
      0x1e,
      0x95,
      0x1d,
      0x18,
      0x1c,
      0xaa,
      0x02,
      0x09,
      0x1e,
      0x33,
      0x1d,
      0xdd,
      0x1d,
      0xdd,
      0x1e,
      0x95,
      0x02,
      0x09,
      0x1d,
      0xdd,
      0x1c,
      0xe5,
      0x02,
      0x09,
      0x1c,
      0x47,
      0x1d,
      0x77,
      0x1d,
      0x77,
      0x02,
      0x09,
      0x1c,
      0xaa,
      0x1e,
      0xe3,
      0x1d,
      0x32,
      0x1d,
      0x77,
      0x02,
      0x77,
      0x02,
      0x09,
      0x1e,
      0xe3,
      0x1c,
      0x47,
      0x1e,
      0x33,
      0x1c,
      0xaa,
      0x02,
      0x09,
      0x1d,
      0xb9,
      0x1d,
      0xdd,
      0x1e,
      0x33,
      0x1e,
      0x71,
      0x1d,
      0x65,
      0x1c,
      0xaa,
      0x02,
      0x09,
      0x1e,
      0x95,
      0x1c,
      0xaa,
      0x1c,
      0xf4,
      0x1d,
      0xb9,
      0x02,
      0x09,
      0x1c,
      0x60,
      0x1d,
      0xdd,
      0x1e,
      0x33,
      0x02,
      0x09,
      0x1c,
      0x47,
      0x1c,
      0xaa,
      0x1e,
      0x33,
      0x1c,
      0xaa,
      0x1e,
      0x71,
      0x02,
      0x77,
      0x02,
      0x09,
      0xfb,
      0x40,
      0xe5,
      0xe5,
      0xfb,
      0x40,
      0xe7,
      0x2c,
      0xfb,
      0x41,
      0x8a,
      0x9e,
      0x3d,
      0x60,
      0xfb,
      0x40,
      0xdc,
      0x11,
      0x3d,
      0x66,
      0x3d,
      0x87,
      0x3d,
      0x60,
      0x3d,
      0x83,
      0x3d,
      0x79,
      0x3d,
      0x67,
      0x02,
      0x8a,
      0x02,
      0x09,
      0x0a,
      0x2d,
      0x13,
      0xdf,
      0x14,
      0x12,
      0x13,
      0xa6,
      // Level separator.
      0x00,
      0x00,
      // Secondary weights.
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x29,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x2f,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x01,
      0x10,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x37,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      0x00,
      0x20,
      // Level separator.
      0x00,
      0x00,
      // Tertiary weights.
      0x00,
      0x08,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x08,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x04,
      0x00,
      0x04,
      0x00,
      0x04,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x0e,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x0e,
      0x00,
      0x0e,
      0x00,
      0x0e,
      0x00,
      0x0e,
      0x00,
      0x0e,
      0x00,
      0x0e,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
      0x00,
      0x02,
  };
  uchar dest[sizeof(expected)];

  size_t ret = 0;
  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    ret = my_strnxfrm(cs, dest, sizeof(dest),
                      pointer_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  EXPECT_EQ(sizeof(expected), ret);
  expect_arrays_equal(expected, dest, ret);
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_MixedUTF8MB4_AS_CS)

// Specifically benchmark Japanese text.
static void BM_JapaneseUTF8MB4(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
  const char *content =
      "データの保存とアクセスを行うストレージエンジンがSQLパーサとは"
      "分離独立しており、用途に応じたストレージエンジンを選択できる"
      "「マルチストレージエンジン」方式を採用している。";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x3d, 0x6d, 0x1c, 0x0e, 0x3d, 0x6a, 0x3d, 0x73, 0xfb, 0x40, 0xcf, 0xdd,
      0xfb, 0x40, 0xdb, 0x58, 0x3d, 0x6e, 0x3d, 0x5a, 0x3d, 0x62, 0x3d, 0x68,
      0x3d, 0x67, 0x3d, 0x8a, 0xfb, 0x41, 0x88, 0x4c, 0x3d, 0x5c, 0x3d, 0x67,
      0x3d, 0x6e, 0x3d, 0x85, 0x1c, 0x0e, 0x3d, 0x66, 0x3d, 0x5e, 0x3d, 0x8b,
      0x3d, 0x66, 0x3d, 0x8b, 0x3d, 0x60, 0x1e, 0x71, 0x1e, 0x21, 0x1d, 0x77,
      0x3d, 0x74, 0x1c, 0x0e, 0x3d, 0x65, 0x3d, 0x6e, 0x3d, 0x74, 0xfb, 0x40,
      0xd2, 0x06, 0xfb, 0x41, 0x96, 0xe2, 0xfb, 0x40, 0xf2, 0xec, 0xfb, 0x40,
      0xfa, 0xcb, 0x3d, 0x66, 0x3d, 0x6d, 0x3d, 0x5f, 0x3d, 0x83, 0x02, 0x31,
      0xfb, 0x40, 0xf5, 0x28, 0xfb, 0x41, 0x90, 0x14, 0x3d, 0x70, 0xfb, 0x40,
      0xdf, 0xdc, 0x3d, 0x66, 0x3d, 0x6a, 0x3d, 0x67, 0x3d, 0x6e, 0x3d, 0x85,
      0x1c, 0x0e, 0x3d, 0x66, 0x3d, 0x5e, 0x3d, 0x8b, 0x3d, 0x66, 0x3d, 0x8b,
      0x3d, 0x8a, 0xfb, 0x41, 0x90, 0x78, 0xfb, 0x40, 0xe2, 0x9e, 0x3d, 0x6d,
      0x3d, 0x61, 0x3d, 0x84, 0x03, 0x73, 0x3d, 0x79, 0x3d, 0x84, 0x3d, 0x6b,
      0x3d, 0x67, 0x3d, 0x6e, 0x3d, 0x85, 0x1c, 0x0e, 0x3d, 0x66, 0x3d, 0x5e,
      0x3d, 0x8b, 0x3d, 0x66, 0x3d, 0x8b, 0x03, 0x74, 0xfb, 0x40, 0xe5, 0xb9,
      0xfb, 0x40, 0xdf, 0x0f, 0x3d, 0x8a, 0xfb, 0x40, 0xe3, 0xa1, 0xfb, 0x40,
      0xf5, 0x28, 0x3d, 0x66, 0x3d, 0x6d, 0x3d, 0x5b, 0x3d, 0x84, 0x02, 0x8a};
  uchar dest[sizeof(expected)];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, sizeof(dest),
                reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  expect_arrays_equal(expected, dest, sizeof(dest));
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_JapaneseUTF8MB4)

/*
  A benchmark that illustrates the potential perils of not including the
  range [0x00,0x20) in our fast path; newlines throw us off the fast path
  and reduce speed.

  The newlines are spaced a bit randomly in order not to create a perfectly
  predictable pattern for the branch predictor (benchmark paranoia).
*/
static void BM_NewlineFilledUTF8MB4(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");

  const char *content =
      "This is a\n prett\ny unrealist\nic case; a\nn "
      "Eng\nlish sente\nnce where\n we'\nve added a new\nline every te\nn "
      "bytes or\n so.\n";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x1e, 0x95, 0x1d, 0x18, 0x1d, 0x32, 0x1e, 0x71, 0x02, 0x09, 0x1d, 0x32,
      0x1e, 0x71, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x02, 0x02, 0x09, 0x1e, 0x0c,
      0x1e, 0x33, 0x1c, 0xaa, 0x1e, 0x95, 0x1e, 0x95, 0x02, 0x02, 0x1f, 0x0b,
      0x02, 0x09, 0x1e, 0xb5, 0x1d, 0xb9, 0x1e, 0x33, 0x1c, 0xaa, 0x1c, 0x47,
      0x1d, 0x77, 0x1d, 0x32, 0x1e, 0x71, 0x1e, 0x95, 0x02, 0x02, 0x1d, 0x32,
      0x1c, 0x7a, 0x02, 0x09, 0x1c, 0x7a, 0x1c, 0x47, 0x1e, 0x71, 0x1c, 0xaa,
      0x02, 0x34, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x02, 0x1d, 0xb9, 0x02, 0x09,
      0x1c, 0xaa, 0x1d, 0xb9, 0x1c, 0xf4, 0x02, 0x02, 0x1d, 0x77, 0x1d, 0x32,
      0x1e, 0x71, 0x1d, 0x18, 0x02, 0x09, 0x1e, 0x71, 0x1c, 0xaa, 0x1d, 0xb9,
      0x1e, 0x95, 0x1c, 0xaa, 0x02, 0x02, 0x1d, 0xb9, 0x1c, 0x7a, 0x1c, 0xaa,
      0x02, 0x09, 0x1e, 0xf5, 0x1d, 0x18, 0x1c, 0xaa, 0x1e, 0x33, 0x1c, 0xaa,
      0x02, 0x02, 0x02, 0x09, 0x1e, 0xf5, 0x1c, 0xaa, 0x03, 0x05, 0x02, 0x02,
      0x1e, 0xe3, 0x1c, 0xaa, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0x8f, 0x1c, 0x8f,
      0x1c, 0xaa, 0x1c, 0x8f, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09, 0x1d, 0xb9,
      0x1c, 0xaa, 0x1e, 0xf5, 0x02, 0x02, 0x1d, 0x77, 0x1d, 0x32, 0x1d, 0xb9,
      0x1c, 0xaa, 0x02, 0x09, 0x1c, 0xaa, 0x1e, 0xe3, 0x1c, 0xaa, 0x1e, 0x33,
      0x1f, 0x0b, 0x02, 0x09, 0x1e, 0x95, 0x1c, 0xaa, 0x02, 0x02, 0x1d, 0xb9,
      0x02, 0x09, 0x1c, 0x60, 0x1f, 0x0b, 0x1e, 0x95, 0x1c, 0xaa, 0x1e, 0x71,
      0x02, 0x09, 0x1d, 0xdd, 0x1e, 0x33, 0x02, 0x02, 0x02, 0x09, 0x1e, 0x71,
      0x1d, 0xdd, 0x02, 0x77, 0x02, 0x02};
  uchar dest[sizeof(expected)];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, sizeof(dest),
                reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  expect_arrays_equal(expected, dest, sizeof(dest));
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_NewlineFilledUTF8MB4)

static void BM_HashSimpleUTF8MB4(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");

  const char *content =
      "This is a rather long string that contains only "
      "simple letters that are available in ASCII. This is a common special "
      "case that warrants a benchmark on its own, even if the character set "
      "and collation supports much more complicated scenarios.";
  const int len = strlen(content);

  uint64 nr1 = 1, nr2 = 4;

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    cs->coll->hash_sort(cs, reinterpret_cast<const uchar *>(content), len, &nr1,
                        &nr2);
  }
  StopBenchmarkTiming();

  /*
    Just to keep the compiler from optimizing away everything; this is highly
    unlikely to ever happen given hash function that's not totally broken.
    Don't test for an exact value; it will vary by platform and number
    of iterations.
  */
  EXPECT_FALSE(nr1 == 0 && nr2 == 0);
}
BENCHMARK(BM_HashSimpleUTF8MB4)

/*
  Test a non-trivial collation with contractions, to highlight
  the performance difference.
*/
static void BM_Hungarian_AS_CS(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_hu_0900_as_cs");

  // Text snippet from Wikipedia.
  const char *content =
      "A MySQL adatbázisok adminisztrációjára a mellékelt "
      "parancssori eszközöket (mysql és mysqladmin) használhatjuk.";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x1c, 0x47, 0x02, 0x09, 0x1d, 0xaa, 0x1f, 0x0b, 0x1e, 0x71, 0x1e, 0x21,
      0x1d, 0x77, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0x8f, 0x1c, 0x47, 0x1e, 0x95,
      0x1c, 0x60, 0x1c, 0x47, 0x1f, 0x21, 0x1d, 0x32, 0x1e, 0x71, 0x1d, 0xdd,
      0x1d, 0x65, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0x8f, 0x1d, 0xaa, 0x1d, 0x32,
      0x1d, 0xb9, 0x1d, 0x32, 0x1e, 0x71, 0x54, 0xa5, 0x1e, 0x95, 0x1e, 0x33,
      0x1c, 0x47, 0x1c, 0x7a, 0x1d, 0x32, 0x1d, 0xdd, 0x1d, 0x4c, 0x1c, 0x47,
      0x1e, 0x33, 0x1c, 0x47, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09, 0x1d, 0xaa,
      0x1c, 0xaa, 0x1d, 0x77, 0x1d, 0x77, 0x1c, 0xaa, 0x1d, 0x65, 0x1c, 0xaa,
      0x1d, 0x77, 0x1e, 0x95, 0x02, 0x09, 0x1e, 0x0c, 0x1c, 0x47, 0x1e, 0x33,
      0x1c, 0x47, 0x1d, 0xb9, 0x1c, 0x7a, 0x54, 0xa5, 0x1e, 0x71, 0x1d, 0xdd,
      0x1e, 0x33, 0x1d, 0x32, 0x02, 0x09, 0x1c, 0xaa, 0x1e, 0x71, 0x54, 0xa5,
      0x1d, 0x65, 0x1d, 0xdd, 0x54, 0xa5, 0x1f, 0x21, 0x1d, 0xdd, 0x54, 0xa5,
      0x1d, 0x65, 0x1c, 0xaa, 0x1e, 0x95, 0x02, 0x09, 0x03, 0x17, 0x1d, 0xaa,
      0x1f, 0x0b, 0x1e, 0x71, 0x1e, 0x21, 0x1d, 0x77, 0x02, 0x09, 0x1c, 0xaa,
      0x1e, 0x71, 0x02, 0x09, 0x1d, 0xaa, 0x1f, 0x0b, 0x1e, 0x71, 0x1e, 0x21,
      0x1d, 0x77, 0x1c, 0x47, 0x1c, 0x8f, 0x1d, 0xaa, 0x1d, 0x32, 0x1d, 0xb9,
      0x03, 0x18, 0x02, 0x09, 0x1d, 0x18, 0x1c, 0x47, 0x1e, 0x71, 0x54, 0xa5,
      0x1d, 0xb9, 0x1c, 0x47, 0x1d, 0x77, 0x1d, 0x18, 0x1c, 0x47, 0x1e, 0x95,
      0x1d, 0x4c, 0x1e, 0xb5, 0x1d, 0x65, 0x02, 0x77, 0x00, 0x00, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x24, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x24,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08,
      0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x08, 0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x08, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02};
  uchar dest[sizeof(expected)] = {0};

  size_t ret = 0;
  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    ret = my_strnxfrm(cs, dest, sizeof(dest),
                      pointer_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  EXPECT_EQ(sizeof(expected), ret);
  expect_arrays_equal(expected, dest, ret);

  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_Hungarian_AS_CS)

static void BM_Japanese_AS_CS(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs");
  const char *content =
      "サーバー SQL モードの設定方法。この設定は、たとえば"
      "別のデータベースシステムからのコードとの互換性を保ったり、特定の状況に"
      "ついてのエラー処理を制御したりするために、SQL の構文およびセマンティクス"
      "の特定の側面を変更します。";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x1F, 0xC1, 0x1F, 0xB6, 0x1F, 0xD0, 0x1F, 0xB6, 0x02, 0x09, 0x1E, 0x71,
      0x1E, 0x21, 0x1D, 0x77, 0x02, 0x09, 0x1F, 0xD9, 0x1F, 0xBB, 0x1F, 0xCA,
      0x1F, 0xCF, 0x5A, 0xC2, 0x5C, 0x45, 0x5E, 0x8C, 0x5E, 0x8E, 0x02, 0x8A,
      0x1F, 0xC0, 0x1F, 0xCF, 0x5A, 0xC2, 0x5C, 0x45, 0x1F, 0xD0, 0x02, 0x31,
      0x1F, 0xC6, 0x1F, 0xCA, 0x1F, 0xBA, 0x1F, 0xD0, 0x5E, 0x5B, 0x1F, 0xCF,
      0x1F, 0xC9, 0x1F, 0xBA, 0x1F, 0xC6, 0x1F, 0xD3, 0x1F, 0xBA, 0x1F, 0xC3,
      0x1F, 0xC2, 0x1F, 0xC3, 0x1F, 0xC9, 0x1F, 0xD7, 0x1F, 0xBC, 0x1F, 0xDE,
      0x1F, 0xCF, 0x1F, 0xC0, 0x1F, 0xBB, 0x1F, 0xCA, 0x1F, 0xCA, 0x1F, 0xCF,
      0x57, 0xD2, 0x56, 0x34, 0x5A, 0x90, 0x1F, 0xE6, 0x5E, 0x6C, 0x1F, 0xC8,
      0x1F, 0xC6, 0x1F, 0xDF, 0x02, 0x31, 0x5C, 0xDA, 0x5C, 0x45, 0x1F, 0xCF,
      0x5A, 0x1C, 0x56, 0xEE, 0x1F, 0xCC, 0x1F, 0xC8, 0x1F, 0xB7, 0x1F, 0xC9,
      0x1F, 0xCF, 0x1F, 0xBA, 0x1F, 0xDE, 0x1F, 0xB6, 0x59, 0xB1, 0x5F, 0xA6,
      0x1F, 0xE6, 0x5A, 0x8C, 0x57, 0xD9, 0x1F, 0xC2, 0x1F, 0xC6, 0x1F, 0xDF,
      0x1F, 0xC3, 0x1F, 0xE0, 0x1F, 0xC6, 0x1F, 0xD8, 0x1F, 0xCC, 0x02, 0x31,
      0x1E, 0x71, 0x1E, 0x21, 0x1D, 0x77, 0x02, 0x09, 0x1F, 0xCF, 0x58, 0x0E,
      0x5E, 0x47, 0x1F, 0xBB, 0x1F, 0xDD, 0x1F, 0xD1, 0x1F, 0xC4, 0x1F, 0xD5,
      0x1F, 0xE7, 0x1F, 0xC9, 0x1F, 0xB7, 0x1F, 0xBE, 0x1F, 0xC3, 0x1F, 0xCF,
      0x5C, 0xDA, 0x5C, 0x45, 0x1F, 0xCF, 0x5B, 0x45, 0x5F, 0x17, 0x1F, 0xE6,
      0x5E, 0x60, 0x58, 0x0A, 0x1F, 0xC2, 0x1F, 0xD5, 0x1F, 0xC3, 0x02, 0x8A,
      0x00, 0x00, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
      0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x21, 0x00, 0x02, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x08, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
      0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C,
      0x00, 0x21, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x21,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21, 0x00, 0x0E,
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x0E, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x0C, 0x00, 0x21, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02};
  uchar dest[sizeof(expected)];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, sizeof(dest),
                reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  expect_arrays_equal(expected, dest, sizeof(dest));
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_Japanese_AS_CS)

static void BM_Japanese_AS_CS_KS(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs_ks");
  const char *content =
      "サーバー SQL モードの設定方法。この設定は、たとえば"
      "別のデータベースシステムからのコードとの互換性を保ったり、特定の状況に"
      "ついてのエラー処理を制御したりするために、SQL の構文およびセマンティクス"
      "の特定の側面を変更します。";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x1F, 0xC1, 0x1F, 0xB6, 0x1F, 0xD0, 0x1F, 0xB6, 0x02, 0x09, 0x1E, 0x71,
      0x1E, 0x21, 0x1D, 0x77, 0x02, 0x09, 0x1F, 0xD9, 0x1F, 0xBB, 0x1F, 0xCA,
      0x1F, 0xCF, 0x5A, 0xC2, 0x5C, 0x45, 0x5E, 0x8C, 0x5E, 0x8E, 0x02, 0x8A,
      0x1F, 0xC0, 0x1F, 0xCF, 0x5A, 0xC2, 0x5C, 0x45, 0x1F, 0xD0, 0x02, 0x31,
      0x1F, 0xC6, 0x1F, 0xCA, 0x1F, 0xBA, 0x1F, 0xD0, 0x5E, 0x5B, 0x1F, 0xCF,
      0x1F, 0xC9, 0x1F, 0xBA, 0x1F, 0xC6, 0x1F, 0xD3, 0x1F, 0xBA, 0x1F, 0xC3,
      0x1F, 0xC2, 0x1F, 0xC3, 0x1F, 0xC9, 0x1F, 0xD7, 0x1F, 0xBC, 0x1F, 0xDE,
      0x1F, 0xCF, 0x1F, 0xC0, 0x1F, 0xBB, 0x1F, 0xCA, 0x1F, 0xCA, 0x1F, 0xCF,
      0x57, 0xD2, 0x56, 0x34, 0x5A, 0x90, 0x1F, 0xE6, 0x5E, 0x6C, 0x1F, 0xC8,
      0x1F, 0xC6, 0x1F, 0xDF, 0x02, 0x31, 0x5C, 0xDA, 0x5C, 0x45, 0x1F, 0xCF,
      0x5A, 0x1C, 0x56, 0xEE, 0x1F, 0xCC, 0x1F, 0xC8, 0x1F, 0xB7, 0x1F, 0xC9,
      0x1F, 0xCF, 0x1F, 0xBA, 0x1F, 0xDE, 0x1F, 0xB6, 0x59, 0xB1, 0x5F, 0xA6,
      0x1F, 0xE6, 0x5A, 0x8C, 0x57, 0xD9, 0x1F, 0xC2, 0x1F, 0xC6, 0x1F, 0xDF,
      0x1F, 0xC3, 0x1F, 0xE0, 0x1F, 0xC6, 0x1F, 0xD8, 0x1F, 0xCC, 0x02, 0x31,
      0x1E, 0x71, 0x1E, 0x21, 0x1D, 0x77, 0x02, 0x09, 0x1F, 0xCF, 0x58, 0x0E,
      0x5E, 0x47, 0x1F, 0xBB, 0x1F, 0xDD, 0x1F, 0xD1, 0x1F, 0xC4, 0x1F, 0xD5,
      0x1F, 0xE7, 0x1F, 0xC9, 0x1F, 0xB7, 0x1F, 0xBE, 0x1F, 0xC3, 0x1F, 0xCF,
      0x5C, 0xDA, 0x5C, 0x45, 0x1F, 0xCF, 0x5B, 0x45, 0x5F, 0x17, 0x1F, 0xE6,
      0x5E, 0x60, 0x58, 0x0A, 0x1F, 0xC2, 0x1F, 0xD5, 0x1F, 0xC3, 0x02, 0x8A,
      0x00, 0x00, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
      0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x21, 0x00, 0x02, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x08, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
      0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C,
      0x00, 0x21, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x21,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21, 0x00, 0x0E,
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x0E, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x0C, 0x00, 0x21, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
      0x00, 0x0E, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x00,
      0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02};
  uchar dest[sizeof(expected)];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, sizeof(dest),
                reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  expect_arrays_equal(expected, dest, sizeof(dest));
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_Japanese_AS_CS_KS)

TEST(StrXfrmTest, ChineseUTF8MB4) {
  CHARSET_INFO *cs = init_collation("utf8mb4_zh_0900_as_cs");

  const char *src =
      "\xE9\x98\xBF\xE5\x92\x97"  // The first and last Han character in zh.xml
      "\xF0\xAC\xBA\xA1"          // The last Han character
      "\xC4\x81\x61\x62\xC5\xAB\x75\x55\xC7\x96\x5A"  // Some latin characters
                                                      // are used as Bopomofo.
      "\xF0\x94\x99\x86"  // The last character that has explicit weight
                          // in the DUCET.

      /* Non-Han characters that have implicit weight. */
      "\xF0\x97\x86\xA0\xF0\xAC\xBA\xA2\xF0\xAE\xAF\xA0\xF0\xB3\x8C\xB3";

  static const unsigned char full_answer_with_pad[116] = {
      // level 1
      0x1C, 0x47, 0xBD, 0xBE,  // The first and last Han character in zh.xml
      0xBD, 0xC3, 0xCE, 0xA1,  // The last Han character
      /* Latin characters. Some are used as Bopomofo. */
      0xBD, 0xC4, 0xBD, 0xC4, 0xBD, 0xDD, 0xC0, 0x32, 0xC0, 0x32, 0xC0, 0x32,
      0xC0, 0x32, 0xC0, 0x9E,

      0xF6, 0x20,  // The last character that has explicit weight in the DUCET.
      /* Non-Han characters that have implicit weight. */
      0xF6, 0x21, 0x81, 0xA0, 0xF6, 0x27, 0xCE, 0xA2, 0xF6, 0x27, 0xEB, 0xE0,
      0xF6, 0x28, 0xB3, 0x33,

      // level separator.
      0x00, 0x00,

      // level 2
      0x00, 0x20, 0x00, 0x20,  // The first and last Han character in zh.xml
      0x00, 0x20,              // The last Han character

      /* Latin characters. Some are used as Bopomofo. */
      0x00, 0x1F, 0x01, 0x16, 0x00, 0x20, 0x00, 0x20, 0x00, 0x1F, 0x01, 0x16,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x01, 0x16, 0x00, 0x20,

      0x00, 0x20,  // The last character that has explicit weight in the DUCET.
      /* Non-Han characters that have implicit weight. */
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      // level separator.
      0x00, 0x00,

      // level 3
      0x00, 0x02, 0x00, 0x02,  // The first and last Han character in zh.xml
      0x00, 0x02,              // The last Han character
      /* Latin characters. Some are used as Bopomofo. */
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08,
      0x00, 0x08, 0x00, 0x08,

      0x00, 0x02,  // The last character that has explicit weight in the DUCET.
      /* Non-Han characters that have implicit weight. */
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02};

  unsigned char buf[sizeof(full_answer_with_pad)];
  for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
    memset(buf, 0xff, sizeof(buf));
    my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
    expect_arrays_equal(full_answer_with_pad, buf, maxlen);
  }
}

static void BM_Chinese_AS_CS(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_zh_0900_as_cs");
  const char *content =
      "春江潮水连海平，海上明月共潮生。"
      "滟滟随波千万里，何处春江无月明！"
      "江流宛转绕芳甸，月照花林皆似霰；"
      "空里流霜不觉飞，汀上白沙看不见。"
      "江天一色无纤尘，皎皎空中孤月轮。"
      "江畔何人初见月？江月何年初照人？"
      "人生代代无穷已，江月年年只相似。"
      "不知江月待何人，但见长江送流水。"
      "白云一片去悠悠，青枫浦上不胜愁。"
      "谁家今夜扁舟子？何处相思明月楼？";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x2C, 0xD0, 0x4F, 0xF1, 0x28, 0x08, 0x87, 0xE8, 0x60, 0x4C, 0x42, 0xEF,
      0x75, 0x93, 0x02, 0x22, 0x42, 0xEF, 0x83, 0x8A, 0x6C, 0x4F, 0xAF, 0x96,
      0x3F, 0x58, 0x28, 0x08, 0x84, 0xCF, 0x02, 0x8A, 0xA3, 0xA4, 0xA3, 0xA4,
      0x8A, 0x5F, 0x23, 0x71, 0x78, 0xA8, 0x93, 0x1A, 0x5E, 0xD9, 0x02, 0x22,
      0x44, 0xAC, 0x2B, 0xD5, 0x2C, 0xD0, 0x4F, 0xF1, 0x96, 0x31, 0xAF, 0x96,
      0x6C, 0x4F, 0x02, 0x60, 0x4F, 0xF1, 0x63, 0x7B, 0x92, 0xDD, 0xBA, 0x2E,
      0x7F, 0x07, 0x39, 0x15, 0x32, 0xB2, 0x02, 0x22, 0xAF, 0x96, 0xB4, 0x41,
      0x47, 0xD7, 0x62, 0x27, 0x51, 0x4C, 0x85, 0xE9, 0x81, 0x86, 0x02, 0x34,
      0x59, 0x09, 0x5E, 0xD9, 0x63, 0x7B, 0x87, 0xBA, 0x24, 0x78, 0x56, 0x5A,
      0x39, 0x48, 0x02, 0x22, 0x8F, 0x74, 0x83, 0x8A, 0x1E, 0x4D, 0x82, 0x46,
      0x57, 0xD9, 0x24, 0x78, 0x4F, 0x79, 0x02, 0x8A, 0x4F, 0xF1, 0x8E, 0x8A,
      0xA6, 0x3E, 0x81, 0xEE, 0x96, 0x31, 0x99, 0x9E, 0x28, 0x97, 0x02, 0x22,
      0x50, 0xC2, 0x50, 0xC2, 0x59, 0x09, 0xB8, 0x20, 0x3F, 0xCC, 0xAF, 0x96,
      0x66, 0xC9, 0x02, 0x8A, 0x4F, 0xF1, 0x72, 0xB6, 0x44, 0xAC, 0x7F, 0x11,
      0x2B, 0x7B, 0x4F, 0x79, 0xAF, 0x96, 0x02, 0x66, 0x4F, 0xF1, 0xAF, 0x96,
      0x44, 0xAC, 0x6F, 0xD5, 0x2B, 0x7B, 0xB4, 0x41, 0x7F, 0x11, 0x02, 0x66,
      0x7F, 0x11, 0x84, 0xCF, 0x2F, 0xE2, 0x2F, 0xE2, 0x96, 0x31, 0x7B, 0xE1,
      0xA7, 0x41, 0x02, 0x22, 0x4F, 0xF1, 0xAF, 0x96, 0x6F, 0xD5, 0x6F, 0xD5,
      0xB6, 0xC3, 0x9B, 0x15, 0x85, 0xE9, 0x02, 0x8A, 0x24, 0x78, 0xB6, 0x2E,
      0x4F, 0xF1, 0xAF, 0x96, 0x2F, 0xF4, 0x44, 0xAC, 0x7F, 0x11, 0x02, 0x22,
      0x30, 0x86, 0x4F, 0x79, 0xB3, 0xDD, 0x4F, 0xF1, 0x89, 0x2A, 0x63, 0x7B,
      0x87, 0xE8, 0x02, 0x8A, 0x1E, 0x4D, 0xB0, 0x1B, 0xA6, 0x3E, 0x75, 0x00,
      0x7D, 0x93, 0xAB, 0xAF, 0xAB, 0xAF, 0x02, 0x22, 0x7B, 0x7D, 0x3A, 0x63,
      0x76, 0xA2, 0x83, 0x8A, 0x24, 0x78, 0x85, 0x16, 0x2B, 0x2D, 0x02, 0x8A,
      0x84, 0x30, 0x4D, 0xF3, 0x52, 0x63, 0xA5, 0xC7, 0x21, 0xE0, 0xB8, 0x87,
      0xBC, 0x16, 0x02, 0x66, 0x44, 0xAC, 0x2B, 0xD5, 0x9B, 0x15, 0x88, 0x52,
      0x6C, 0x4F, 0xAF, 0x96, 0x64, 0xA1, 0x02, 0x66, 0x00, 0x00, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
      0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
      0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
      0x00, 0x02, 0x00, 0x03};
  uchar dest[sizeof(expected)];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, sizeof(dest),
                reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  expect_arrays_equal(expected, dest, sizeof(dest));
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_Chinese_AS_CS)

static void BM_UTF8MB4_bin(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_bin");
  const char *content =
      "Premature optimization is the root of all evil. "
      "Våre norske tegn bør æres. 日本語が少しわかります。 "
      "✌️🐶👩🏽";
  const int len = strlen(content);

  // Just recorded from a trial run on the string above.
  static constexpr uchar expected[] = {
      0x00, 0x00, 0x50, 0x00, 0x00, 0x72, 0x00, 0x00, 0x65, 0x00, 0x00, 0x6D,
      0x00, 0x00, 0x61, 0x00, 0x00, 0x74, 0x00, 0x00, 0x75, 0x00, 0x00, 0x72,
      0x00, 0x00, 0x65, 0x00, 0x00, 0x20, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x70,
      0x00, 0x00, 0x74, 0x00, 0x00, 0x69, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x69,
      0x00, 0x00, 0x7A, 0x00, 0x00, 0x61, 0x00, 0x00, 0x74, 0x00, 0x00, 0x69,
      0x00, 0x00, 0x6F, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x20, 0x00, 0x00, 0x69,
      0x00, 0x00, 0x73, 0x00, 0x00, 0x20, 0x00, 0x00, 0x74, 0x00, 0x00, 0x68,
      0x00, 0x00, 0x65, 0x00, 0x00, 0x20, 0x00, 0x00, 0x72, 0x00, 0x00, 0x6F,
      0x00, 0x00, 0x6F, 0x00, 0x00, 0x74, 0x00, 0x00, 0x20, 0x00, 0x00, 0x6F,
      0x00, 0x00, 0x66, 0x00, 0x00, 0x20, 0x00, 0x00, 0x61, 0x00, 0x00, 0x6C,
      0x00, 0x00, 0x6C, 0x00, 0x00, 0x20, 0x00, 0x00, 0x65, 0x00, 0x00, 0x76,
      0x00, 0x00, 0x69, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x20,
      0x00, 0x00, 0x56, 0x00, 0x00, 0xE5, 0x00, 0x00, 0x72, 0x00, 0x00, 0x65,
      0x00, 0x00, 0x20, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x72,
      0x00, 0x00, 0x73, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x65, 0x00, 0x00, 0x20,
      0x00, 0x00, 0x74, 0x00, 0x00, 0x65, 0x00, 0x00, 0x67, 0x00, 0x00, 0x6E,
      0x00, 0x00, 0x20, 0x00, 0x00, 0x62, 0x00, 0x00, 0xF8, 0x00, 0x00, 0x72,
      0x00, 0x00, 0x20, 0x00, 0x00, 0xE6, 0x00, 0x00, 0x72, 0x00, 0x00, 0x65,
      0x00, 0x00, 0x73, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x20, 0x00, 0x65, 0xE5,
      0x00, 0x67, 0x2C, 0x00, 0x8A, 0x9E, 0x00, 0x30, 0x4C, 0x00, 0x5C, 0x11,
      0x00, 0x30, 0x57, 0x00, 0x30, 0x8F, 0x00, 0x30, 0x4B, 0x00, 0x30, 0x8A,
      0x00, 0x30, 0x7E, 0x00, 0x30, 0x59, 0x00, 0x30, 0x02, 0x00, 0x00, 0x20,
      0x00, 0x27, 0x0C, 0x00, 0xFE, 0x0F, 0x01, 0xF4, 0x36, 0x01, 0xF4, 0x69,
      0x01, 0xF3, 0xFD};
  uchar dest[sizeof(expected)];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, sizeof(dest),
                reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  expect_arrays_equal(expected, dest, sizeof(dest));
  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_UTF8MB4_bin)

static void BM_UTF8MB4_0900_bin(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_bin");
  const char *content =
      "Premature optimization is the root of all evil. "
      "Våre norske tegn bør æres. 日本語が少しわかります。 "
      "✌️🐶👩🏽";
  const int len = strlen(content);

  uchar *dest = new uchar[len];

  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    my_strnxfrm(cs, dest, len, reinterpret_cast<const uchar *>(content), len);
  }
  StopBenchmarkTiming();

  /*
    utf8mb4_0900_bin_nopad gives the weight that has same bytes and length as
    source string.
   */
  expect_arrays_equal((const uchar *)content, dest, len);
  delete[] dest;
  SetBytesProcessed(num_iterations * len);
}
BENCHMARK(BM_UTF8MB4_0900_bin)

// The classic MySQL latin1 collation, for reference.
static void BM_Latin1_CI(size_t num_iterations) {
  StopBenchmarkTiming();

  CHARSET_INFO *cs = init_collation("latin1_swedish_ci");

  const char *content =
      "Alla människor är födda fria och lika i värde "
      "och rättigheter. De är utrustade med förnuft och samvete och bör "
      "handla gentemot varandra i en anda av broderskap.";
  const int len = strlen(content);

  /*
    Just recorded from a trial run on the string above.
    The entire last row is padding.
  */
  static constexpr uchar expected[] = {
      0x41, 0x4c, 0x4c, 0x41, 0x20, 0x4d, 0x41, 0xa4, 0x4e, 0x4e, 0x49, 0x53,
      0x4b, 0x4f, 0x52, 0x20, 0x41, 0xa4, 0x52, 0x20, 0x46, 0x41, 0xb6, 0x44,
      0x44, 0x41, 0x20, 0x46, 0x52, 0x49, 0x41, 0x20, 0x4f, 0x43, 0x48, 0x20,
      0x4c, 0x49, 0x4b, 0x41, 0x20, 0x49, 0x20, 0x56, 0x41, 0xa4, 0x52, 0x44,
      0x45, 0x20, 0x4f, 0x43, 0x48, 0x20, 0x52, 0x41, 0xa4, 0x54, 0x54, 0x49,
      0x47, 0x48, 0x45, 0x54, 0x45, 0x52, 0x2e, 0x20, 0x44, 0x45, 0x20, 0x41,
      0xa4, 0x52, 0x20, 0x55, 0x54, 0x52, 0x55, 0x53, 0x54, 0x41, 0x44, 0x45,
      0x20, 0x4d, 0x45, 0x44, 0x20, 0x46, 0x41, 0xb6, 0x52, 0x4e, 0x55, 0x46,
      0x54, 0x20, 0x4f, 0x43, 0x48, 0x20, 0x53, 0x41, 0x4d, 0x56, 0x45, 0x54,
      0x45, 0x20, 0x4f, 0x43, 0x48, 0x20, 0x42, 0x41, 0xb6, 0x52, 0x20, 0x48,
      0x41, 0x4e, 0x44, 0x4c, 0x41, 0x20, 0x47, 0x45, 0x4e, 0x54, 0x45, 0x4d,
      0x4f, 0x54, 0x20, 0x56, 0x41, 0x52, 0x41, 0x4e, 0x44, 0x52, 0x41, 0x20,
      0x49, 0x20, 0x45, 0x4e, 0x20, 0x41, 0x4e, 0x44, 0x41, 0x20, 0x41, 0x56,
      0x20, 0x42, 0x52, 0x4f, 0x44, 0x45, 0x52, 0x53, 0x4b, 0x41, 0x50, 0x2e,
      0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
  };
  uchar dest[sizeof(expected)];

  size_t ret = 0;
  StartBenchmarkTiming();
  for (size_t i = 0; i < num_iterations; ++i) {
    ret = cs->coll->strnxfrm(cs, dest, sizeof(dest), sizeof(dest),
                             pointer_cast<const uchar *>(content), len,
                             MY_STRXFRM_PAD_TO_MAXLEN);
  }
  StopBenchmarkTiming();

  EXPECT_EQ(sizeof(expected), ret);
  expect_arrays_equal(expected, dest, ret);

  SetBytesProcessed(num_iterations * strlen(content));
}
BENCHMARK(BM_Latin1_CI)

// Since the UCA collations are NO PAD, strnncollsp should heed spaces.
TEST(PadCollationTest, BasicTest) {
  constexpr char foo[] = "foo";
  constexpr char foosp[] = "foo    ";
  constexpr char bar[] = "bar";
  constexpr char foobar[] = "foobar";

  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
  auto my_strnncollsp = cs->coll->strnncollsp;

  // "foo" == "foo"
  EXPECT_EQ(my_strnncollsp(cs, pointer_cast<const uchar *>(foo), strlen(foo),
                           pointer_cast<const uchar *>(foo), strlen(foo)),
            0);
  // "foo" < "foo    "
  EXPECT_LT(my_strnncollsp(cs, pointer_cast<const uchar *>(foo), strlen(foo),
                           pointer_cast<const uchar *>(foosp), strlen(foosp)),
            0);
  // "foo" > "bar"
  EXPECT_GT(my_strnncollsp(cs, pointer_cast<const uchar *>(foo), strlen(foo),
                           pointer_cast<const uchar *>(bar), strlen(bar)),
            0);
  // "foo" < "foobar".
  EXPECT_LT(my_strnncollsp(cs, pointer_cast<const uchar *>(foo), strlen(foo),
                           pointer_cast<const uchar *>(foobar), strlen(foobar)),
            0);

  // Exactly the same tests in reverse.

  // "foo    " > "foo"
  EXPECT_GT(
      my_strnncollsp(cs, pointer_cast<const uchar *>(foosp), strlen(foosp),
                     pointer_cast<const uchar *>(foo), strlen(foo)),
      0);
  // "bar" < "foo"
  EXPECT_LT(my_strnncollsp(cs, pointer_cast<const uchar *>(bar), strlen(bar),
                           pointer_cast<const uchar *>(foo), strlen(foo)),
            0);
  // "foobar" > "foo".
  EXPECT_GT(
      my_strnncollsp(cs, pointer_cast<const uchar *>(foobar), strlen(foobar),
                     pointer_cast<const uchar *>(foo), strlen(foo)),
      0);
}

TEST(StrxfrmTest, NoPadCollation) {
  CHARSET_INFO *ai_ci = init_collation("utf8mb4_0900_ai_ci");
  CHARSET_INFO *as_cs = init_collation("utf8mb4_0900_as_cs");
  CHARSET_INFO *as_ci = init_collation("utf8mb4_0900_as_ci");

  // Basic sanity checks.
  EXPECT_EQ(compare_through_strxfrm(ai_ci, "abc", "abc"), 0);
  EXPECT_NE(compare_through_strxfrm(as_ci, "abc", "Ǎḅç"), 0);
  EXPECT_NE(compare_through_strxfrm(ai_ci, "abc", "def"), 0);
  EXPECT_NE(compare_through_strxfrm(as_ci, "abc", "def"), 0);

  // Spaces from the end should matter, no matter the collation.
  EXPECT_LT(compare_through_strxfrm(ai_ci, "abc", "abc  "), 0);
  EXPECT_LT(compare_through_strxfrm(as_ci, "abc", "Ǎḅç  "), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs, "abc", "abc  "), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs, "abc", "Abc  "), 0);

  // Same with other types of spaces.
  EXPECT_LT(compare_through_strxfrm(ai_ci, "abc", u8"abc \u00a0"), 0);

  // Non-breaking space should compare _equal_ to space in ai_ci and as_ci,
  // but _after_ in as_cs.
  EXPECT_EQ(compare_through_strxfrm(ai_ci, "abc ", u8"abc\u00a0"), 0);
  EXPECT_EQ(compare_through_strxfrm(as_ci, "abc ", u8"abc\u00a0"), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs, "abc ", u8"abc\u00a0"), 0);

  // Also in the middle of the string.
  EXPECT_EQ(compare_through_strxfrm(ai_ci, "a c", u8"a\u00a0c"), 0);
  EXPECT_EQ(compare_through_strxfrm(as_ci, "a c", u8"a\u00a0c"), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs, "a c", u8"a\u00a0c"), 0);

  // Verify that space in the middle of the string isn't stripped.
  EXPECT_LT(compare_through_strxfrm(ai_ci, "ab  c", "abc"), 0);
  EXPECT_LT(compare_through_strxfrm(as_ci, "ab  c", "abc"), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs, "ab  c", "abc"), 0);

  // Whitespace ordering as specified by DUCET.
  EXPECT_GT(compare_through_strxfrm(as_ci, " ", "\t"), 0);
  EXPECT_GT(compare_through_strxfrm(as_cs, " ", "\t"), 0);
  EXPECT_LT(compare_through_strxfrm(as_cs, "", "\t"), 0);
}

TEST(StrxfrmTest, Contractions) {
  CHARSET_INFO *hu_ai_ci = init_collation("utf8mb4_hu_0900_ai_ci");

  // Basic sanity checks.
  EXPECT_EQ(compare_through_strxfrm(hu_ai_ci, "abc", "abc"), 0);
  EXPECT_NE(compare_through_strxfrm(hu_ai_ci, "abc", "def"), 0);
  EXPECT_EQ(compare_through_strxfrm(hu_ai_ci, "abc", "Abc"), 0);

  // "cs" counts as a separate letter, where c < cs < d, so:
  EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "c", "cs"), 0);
  EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "cs", "d"), 0);
  EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "ct", "cst"), 0);
  EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "cst", "dt"), 0);

  // Wikipedia gives this as an example.
  EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "cukor", "csak"), 0);
}

/*
  This test is disabled by default since it needs ~10 seconds to run,
  even in optimized mode.
*/
TEST(BitfiddlingTest, DISABLED_FastOutOfRange) {
  unsigned char bytes[4];
  for (int a = 0; a < 256; ++a) {
    bytes[0] = a;
    for (int b = 0; b < 256; ++b) {
      bytes[1] = b;
      for (int c = 0; c < 256; ++c) {
        bytes[2] = c;
        for (int d = 0; d < 256; ++d) {
          bytes[3] = d;
          bool any_out_of_range_slow =
              (a < 0x20 || a > 0x7e) || (b < 0x20 || b > 0x7e) ||
              (c < 0x20 || c > 0x7e) || (d < 0x20 || d > 0x7e);

          uint32 four_bytes;
          memcpy(&four_bytes, bytes, sizeof(four_bytes));
          bool any_out_of_range_fast =
              (((four_bytes + 0x01010101u) & 0x80808080) ||
               ((four_bytes - 0x20202020u) & 0x80808080));

          EXPECT_EQ(any_out_of_range_slow, any_out_of_range_fast);
        }
      }
    }
  }
}

/*
  A version of FastOutOfRange that tests the analogous trick for 16-bit
  integers instead (much, much faster).
*/
TEST(BitfiddlingTest, FastOutOfRange16) {
  unsigned char bytes[2];
  for (int a = 0; a < 256; ++a) {
    bytes[0] = a;
    for (int b = 0; b < 256; ++b) {
      bytes[1] = b;
      bool any_out_of_range_slow =
          (a < 0x20 || a > 0x7e) || (b < 0x20 || b > 0x7e);

      uint16 two_bytes;
      memcpy(&two_bytes, bytes, sizeof(two_bytes));
      bool any_out_of_range_fast =
          (((two_bytes + uint16{0x0101}) & uint16{0x8080}) ||
           ((two_bytes - uint16{0x2020}) & uint16{0x8080}));

      EXPECT_EQ(any_out_of_range_slow, any_out_of_range_fast);
    }
  }
}

uint64 hash(CHARSET_INFO *cs, const char *str) {
  uint64 nr1 = 1, nr2 = 4;
  cs->coll->hash_sort(cs, pointer_cast<const uchar *>(str), strlen(str), &nr1,
                      &nr2);
  return nr1;
}

/*
  NOTE: In this entire test, there's an infinitesimal chance
  that something that we expect doesn't match, still matches
  by pure accident.
*/
TEST(PadCollationTest, HashSort) {
  CHARSET_INFO *ai_ci = init_collation("utf8mb4_0900_ai_ci");
  CHARSET_INFO *as_cs = init_collation("utf8mb4_0900_as_cs");

  // Basic sanity checks.
  EXPECT_EQ(hash(ai_ci, "abc"), hash(ai_ci, "abc"));
  EXPECT_NE(hash(ai_ci, "abc"), hash(ai_ci, "def"));

  // Spaces from the end should matter, no matter the collation.
  EXPECT_NE(hash(ai_ci, "abc"), hash(ai_ci, "abc  "));
  EXPECT_NE(hash(as_cs, "abc"), hash(as_cs, "abc  "));
  EXPECT_NE(hash(as_cs, "abc"), hash(as_cs, "Abc  "));

  // Same with other types of spaces.
  EXPECT_NE(hash(ai_ci, "abc"), hash(ai_ci, u8"abc \u00a0"));

  // Non-breaking space should compare _equal_ to space in ai_ci,
  // but _inequal_ in as_cs.
  EXPECT_EQ(hash(ai_ci, "abc "), hash(ai_ci, u8"abc\u00a0"));
  EXPECT_NE(hash(as_cs, "abc "), hash(as_cs, u8"abc\u00a0"));
  EXPECT_NE(hash(as_cs, "abc"), hash(as_cs, u8"abc\u00a0"));

  // Also in the middle of the string.
  EXPECT_EQ(hash(ai_ci, "a c"), hash(ai_ci, u8"a\u00a0c"));
  EXPECT_NE(hash(as_cs, "a c"), hash(as_cs, u8"a\u00a0c"));

  // Verify that space in the middle of the string isn't stripped.
  EXPECT_NE(hash(ai_ci, "ab  c"), hash(ai_ci, "abc"));
  EXPECT_NE(hash(as_cs, "ab  c"), hash(as_cs, "abc"));
}

TEST(HashTest, NullPointer) {
  CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
  uint64 nr1 = 1, nr2 = 4;

  /*
    We should get the same hash from the empty string no matter what
    the pointer is.
  */
  cs->coll->hash_sort(cs, nullptr, 0, &nr1, &nr2);
  EXPECT_EQ(nr1, hash(cs, ""));

  cs->coll->hash_sort(cs, pointer_cast<const uchar *>("        "), 8, &nr1,
                      &nr2);
  // Don't care what the values are, just that we don't crash.
}

namespace {

// Test that strnxfrmlen() holds for all single characters.
void test_strnxfrmlen(CHARSET_INFO *cs) {
  pair<size_t, my_wc_t> longest{0, 0};

  uchar inbuf[16], outbuf[256];  // Ought to be enough for anyone.
  const size_t max_len = cs->coll->strnxfrmlen(cs, cs->mbmaxlen);

  for (my_wc_t ch = 0; ch <= 0x10ffff; ++ch) {
    size_t in_len = cs->cset->wc_mb(cs, ch, inbuf, inbuf + sizeof(inbuf));
    if (in_len <= 0) {
      continue;  // Not representable in this character set.
    }
    size_t out_len =
        cs->coll->strnxfrm(cs, outbuf, sizeof(outbuf), 1, inbuf, in_len, 0);
    EXPECT_LE(out_len, max_len);
    if (out_len > max_len) {
      fprintf(stderr, "U+%04lX needed more room than strnxfrmlen() claimed\n",
              ch);
      fprintf(stderr, "Weight string:");
      for (size_t i = 0; i < out_len; ++i) {
        fprintf(stderr, " %02x", outbuf[i]);
      }
      fprintf(stderr, "\n\n");
    }

    longest = max(longest, make_pair(out_len, ch));
  }

  fprintf(stderr,
          "Longest character in '%s': U+%04lX, %d bytes (strnxfrm_len=%d)\n",
          cs->m_coll_name, longest.second, static_cast<int>(longest.first),
          static_cast<int>(max_len));
}

}  // namespace

TEST(StrxfrmLenTest, StrnxfrmLenIsLongEnoughForAllCharacters) {
  // Load one collation to get everything going.
  init_collation("utf8mb4_0900_ai_ci");

  for (CHARSET_INFO *cs : all_charsets) {
    if (cs && (cs->state & MY_CS_AVAILABLE)) {
      SCOPED_TRACE(cs->m_coll_name);
      test_strnxfrmlen(init_collation(cs->m_coll_name));
    }
  }
}

// Golden hashes for a test string. These may be stored on disk, so we need to
// make sure that they never change.
struct GoldenHashResult {
  pair<uint64, uint64> hash_value;
};

TEST(StrmxfrmHashTest, HashStability) {
  // Load one collation to get everything going.
  init_collation("utf8mb4_0900_ai_ci");

  // Reference values. Please keep this list sorted.
  unordered_map<string, GoldenHashResult> expected = {
      {"armscii8_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"armscii8_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"ascii_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"ascii_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"big5_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"big5_chinese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"binary", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp1250_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp1250_croatian_ci", {{0xe25aa32298f78f4aLL, 0x000002b0LL}}},
      {"cp1250_czech_cs", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp1250_general_ci", {{0x81c46f6c6b06f8fcLL, 0x000002b0LL}}},
      {"cp1250_polish_ci", {{0xe25aa32298f78f4aLL, 0x000002b0LL}}},
      {"cp1251_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp1251_bulgarian_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"cp1251_general_ci", {{0xce71da5364c300a4LL, 0x000002b0LL}}},
      {"cp1251_general_cs", {{0xff44ce45c6d3d142LL, 0x000002b0LL}}},
      {"cp1251_ukrainian_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"cp1256_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp1256_general_ci", {{0x44ed84e7ad4a6c1cLL, 0x000002b0LL}}},
      {"cp1257_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp1257_general_ci", {{0x15219f243a38ad58LL, 0x000002b0LL}}},
      {"cp1257_lithuanian_ci", {{0xaa3ef638e5e056e8LL, 0x000002b0LL}}},
      {"cp850_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp850_general_ci", {{0xf32b1cf4087a0b08LL, 0x000002b0LL}}},
      {"cp852_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp852_general_ci", {{0x60dce9bffdeccd52LL, 0x000002b0LL}}},
      {"cp866_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp866_general_ci", {{0xce71da5364c300a4LL, 0x000002b0LL}}},
      {"cp932_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"cp932_japanese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"dec8_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"dec8_swedish_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"eucjpms_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"eucjpms_japanese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"euckr_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"euckr_korean_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"gb18030_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"gb18030_chinese_ci", {{0xb7b6676124243e73LL, 0x00000abdLL}}},
      {"gb18030_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
      {"gb2312_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"gb2312_chinese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"gbk_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"gbk_chinese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"geostd8_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"geostd8_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"greek_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"greek_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"hebrew_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"hebrew_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"hp8_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"hp8_english_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"keybcs2_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"keybcs2_general_ci", {{0xd2d54c0201229650LL, 0x000002b0LL}}},
      {"koi8r_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"koi8r_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"koi8u_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"koi8u_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"latin1_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"latin1_danish_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"latin1_general_ci", {{0xd7d424d55cb8f402LL, 0x000002b0LL}}},
      {"latin1_general_cs", {{0x96b2a3f94ffe41f9LL, 0x000002b0LL}}},
      {"latin1_german1_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"latin1_german2_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"latin1_spanish_ci", {{0xd7d424d55cb8f402LL, 0x000002b0LL}}},
      {"latin1_swedish_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"latin2_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"latin2_croatian_ci", {{0xe25aa32298f78f4aLL, 0x000002b0LL}}},
      {"latin2_czech_cs", {{0xba89a4855c3a88b6LL, 0x000002b0LL}}},
      {"latin2_general_ci", {{0xd9179195a5ddebf8LL, 0x000002b0LL}}},
      {"latin2_hungarian_ci", {{0xba89a4855c3a88b6LL, 0x000002b0LL}}},
      {"latin5_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"latin5_turkish_ci", {{0x68989a162aab9f1cLL, 0x000002b0LL}}},
      {"latin7_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"latin7_estonian_cs", {{0xa281f3df87b89fe1LL, 0x000002b0LL}}},
      {"latin7_general_ci", {{0xc6808727382ffb41LL, 0x000002b0LL}}},
      {"latin7_general_cs", {{0xf70d2b9f0d640804LL, 0x000002b0LL}}},
      {"macce_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"macce_general_ci", {{0xb27ca521eb9b7492LL, 0x000002b0LL}}},
      {"macroman_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"macroman_general_ci", {{0x3254bac0fa3625efLL, 0x000002b0LL}}},
      {"sjis_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"sjis_japanese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"swe7_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"swe7_swedish_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"tis620_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"tis620_thai_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"ucs2_bin", {{0x1877f0a25b18b4c6LL, 0x0000055fLL}}},
      {"ucs2_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"ucs2_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
      {"ucs2_general_mysql500_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
      {"ucs2_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
      {"ucs2_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
      {"ucs2_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
      {"ucs2_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"ucs2_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
      {"ucs2_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
      {"ucs2_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
      {"ucs2_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ucs2_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"ujis_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"ujis_japanese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
      {"utf16_bin", {{0x1877f0a25b18b4c6LL, 0x0000055fLL}}},
      {"utf16_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"utf16_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
      {"utf16_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
      {"utf16_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
      {"utf16_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
      {"utf16_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"utf16_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
      {"utf16_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
      {"utf16_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
      {"utf16_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf16le_bin", {{0x3da26ce08ecbfaf9LL, 0x0000055fLL}}},
      {"utf16le_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
      {"utf32_bin", {{0x353330032692faLL, 0x00000abdLL}}},
      {"utf32_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"utf32_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_general_ci", {{0x353330032692faLL, 0x00000abdLL}}},
      {"utf32_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
      {"utf32_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
      {"utf32_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
      {"utf32_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"utf32_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
      {"utf32_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
      {"utf32_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
      {"utf32_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf32_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"utf8mb3_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"utf8mb3_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
      {"utf8mb3_general_mysql500_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
      {"utf8mb3_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
      {"utf8mb3_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
      {"utf8mb3_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
      {"utf8mb3_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"utf8mb3_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
      {"utf8mb3_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_tolower_ci", {{0x8eab9a2c403c8eb9LL, 0x0000055fLL}}},
      {"utf8mb3_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
      {"utf8mb3_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
      {"utf8mb3_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb3_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_0900_as_ci", {{0xfc978781d49d0d9bLL, 0x00000001LL}}},
      {"utf8mb4_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_0900_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"utf8mb4_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
      {"utf8mb4_bg_0900_ai_ci", {{0xb55bc2bf5ab2bf53LL, 0x00000001LL}}},
      {"utf8mb4_bg_0900_as_cs", {{0x36f5a31292841899LL, 0x00000001LL}}},
      {"utf8mb4_bs_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_bs_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_cs_0900_ai_ci", {{0x36582be4fafa0bbbLL, 0x00000001LL}}},
      {"utf8mb4_cs_0900_as_cs", {{0xac403419684d8c71LL, 0x00000001LL}}},
      {"utf8mb4_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"utf8mb4_da_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_da_0900_as_cs", {{0xbd24fdcb7b0cf519LL, 0x00000001LL}}},
      {"utf8mb4_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_de_pb_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_de_pb_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_eo_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_eo_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_es_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_es_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_es_trad_0900_ai_ci", {{0x555a77b8a263f17fLL, 0x00000001LL}}},
      {"utf8mb4_es_trad_0900_as_cs", {{0xae993a138c5c030dLL, 0x00000001LL}}},
      {"utf8mb4_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_et_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_et_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
      {"utf8mb4_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_gl_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_gl_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_hr_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_hr_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_hu_0900_ai_ci", {{0x3162e9e9cebb9148LL, 0x00000001LL}}},
      {"utf8mb4_hu_0900_as_cs", {{0x88842661c548eec1LL, 0x00000001LL}}},
      {"utf8mb4_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_is_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_is_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_ja_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_ja_0900_as_cs_ks", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_la_0900_ai_ci", {{0x2928cd07bca9a85dLL, 0x00000001LL}}},
      {"utf8mb4_la_0900_as_cs", {{0x29a7f3eb43a9819LL, 0x00000001LL}}},
      {"utf8mb4_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
      {"utf8mb4_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
      {"utf8mb4_lt_0900_ai_ci", {{0xcd5ce469f67f6792LL, 0x00000001LL}}},
      {"utf8mb4_lt_0900_as_cs", {{0xe2e6dc41a4d6b3c1LL, 0x00000001LL}}},
      {"utf8mb4_lv_0900_ai_ci", {{0xcd5ce469f67f6792LL, 0x00000001LL}}},
      {"utf8mb4_lv_0900_as_cs", {{0xfe377cec9551f0f4LL, 0x00000001LL}}},
      {"utf8mb4_mn_cyrl_0900_ai_ci", {{0xb55bc2bf5ab2bf53LL, 0x00000001LL}}},
      {"utf8mb4_mn_cyrl_0900_as_cs", {{0x36f5a31292841899LL, 0x00000001LL}}},
      {"utf8mb4_nb_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_nb_0900_as_cs", {{0xCFB3E3073C9F5A19LL, 0x00000001LL}}},
      {"utf8mb4_nn_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_nn_0900_as_cs", {{0xCFB3E3073C9F5A19LL, 0x00000001LL}}},
      {"utf8mb4_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_pl_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_pl_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_ro_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_ro_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
      {"utf8mb4_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_ru_0900_ai_ci", {{0xb55bc2bf5ab2bf53LL, 0x00000001LL}}},
      {"utf8mb4_ru_0900_as_cs", {{0x36f5a31292841899LL, 0x00000001LL}}},
      {"utf8mb4_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_sk_0900_ai_ci", {{0x36582be4fafa0bbbLL, 0x00000001LL}}},
      {"utf8mb4_sk_0900_as_cs", {{0xac403419684d8c71LL, 0x00000001LL}}},
      {"utf8mb4_sl_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_sl_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
      {"utf8mb4_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
      {"utf8mb4_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_sr_latn_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_sr_latn_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_sv_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_sv_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_tr_0900_ai_ci", {{0x7ea67be76364740fLL, 0x00000001LL}}},
      {"utf8mb4_tr_0900_as_cs", {{0xfa4556e24336675eLL, 0x00000001LL}}},
      {"utf8mb4_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
      {"utf8mb4_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
      {"utf8mb4_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_vi_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
      {"utf8mb4_vi_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
      {"utf8mb4_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
      {"utf8mb4_zh_0900_as_cs", {{0x23c370d9ac589d1fLL, 0x00000001LL}}},
  };

  string test_str =
      "This is a fairly long string. It does not contain any special "
      "characters since they are probably not universally supported across all "
      "character sets, but should at least be enough to make the nr1 value go "
      "up past the 32-bit mark.";

  for (CHARSET_INFO *cs : all_charsets) {
    if (cs && (cs->state & MY_CS_AVAILABLE)) {
      init_collation(cs->m_coll_name);

      char buf[4096];
      uint errors;
      size_t len =
          my_convert(buf, sizeof(buf), cs, test_str.data(), test_str.size(),
                     &my_charset_utf8mb4_0900_ai_ci, &errors);
      ASSERT_EQ(0, errors);

      uint64 nr1 = 4, nr2 = 1;
      cs->coll->hash_sort(cs, pointer_cast<const uchar *>(buf), len, &nr1,
                          &nr2);

      // Change this from false to true to output source code you can paste
      // into “expected” above.
      if (false) {
        printf("    {\"%s\", {{0x%016" PRIx64 "LL, 0x%" PRIx64 "LL}}},\n",
               cs->m_coll_name, nr1, nr2);
        continue;
      }

      ASSERT_EQ(1, expected.count(cs->m_coll_name))
          << "Character set " << cs->m_coll_name
          << " is missing in the database";
      SCOPED_TRACE(cs->m_coll_name);

      EXPECT_EQ(expected[cs->m_coll_name].hash_value.first, nr1);
      EXPECT_EQ(expected[cs->m_coll_name].hash_value.second, nr2);
    }
  }
}

}  // namespace strnxfrm_unittest
